from itertools import chain

from wikitextprocessor import HTMLNode, NodeKind, TemplateNode, WikiNode

# from wikitextprocessor.parser import print_tree
from wiktextract.page import clean_node
from wiktextract.wxr_context import WiktextractContext
from wiktextract.wxr_logging import logger

from .models import Form, WordEntry
from .simple_tags import simple_tag_map
from .tags_utils import convert_tags

# Shorthand for this file. Could be an import, but it's so simple...
Node = str | WikiNode


# node_fns are different from template_fns. template_fns are functions that
# are used to handle how to expand (and otherwise process) templates, while
# node functions are used when turning parsed nodes into strings.
def cell_node_fn(
    node: WikiNode,
) -> list[Node] | None:
    """Handle nodes in the parse_tree specially. Currently: check for italics
    containing the string 'none' and replace with hyphen."""
    assert isinstance(node, WikiNode)
    if node.kind == NodeKind.ITALIC:
        # If we have italicized text 'none', like in `deviate`, turn it to "–"
        # XXX 'None' without italics...
        if (
            len(node.children) == 1
            and isinstance(node.children[0], str)
            and node.children[0].strip() == "none"
        ):
            return ["–"]
    # This latter bit is from the default node_handler function and really
    # unnecessary, but in case someone puts tables inside tables...
    kind = node.kind
    if kind in {
        NodeKind.TABLE_CELL,
        NodeKind.TABLE_HEADER_CELL,
    }:
        return node.children
    return None


# =====================
# Table parsing example
# =====================
def parse_pos_table(
    wxr: WiktextractContext, tnode: TemplateNode, data: WordEntry
) -> list[Form]:
    """Parse inflection table. In the original extractors, tables are only
    parsed for inflection tables and others that generate 'form' data; 'foos'
    is a form of 'foo' with the tags ['plural']."""
    # Tables are almost always generated by templates, but not always. This
    # code assumes the edition being parsed does that.
    assert isinstance(tnode, TemplateNode)
    # Expand the template into text (and all subtemplates too), then parse.
    tree = wxr.wtp.parse(wxr.wtp.node_to_wikitext(tnode), expand_all=True)

    # Some debugging code: if wiktwords is passed a --inflection-tables-file
    # argument, we save tables to a file for debugging purposes, or for just
    # getting tables that can be used as test data.
    if wxr.config.expand_tables:
        with open(wxr.config.expand_tables, "w") as f:
            f.write(f"{wxr.wtp.title=}\n")
            text = wxr.wtp.node_to_wikitext(tree)
            f.write(f"{text}\n")

    # Check if there are actually any headers. This example was taken originally
    # from the Simple English extractor, and the SE edition doesn't use headers.
    # Having headers is better than not, but when they're inconsistenly applied,
    # it's a headache.
    for header in tree.find_child_recursively(NodeKind.TABLE_HEADER_CELL):
        # Collect some data on what kind of headers are present in tables.
        wxr.wtp.debug(
            f"POS template table has headers! {repr(header)[:255]}",
            sortid="simple/table/45",
        )

    # Let's assume we have a simple table with one row of headers on top and
    # a column of headers on the left side of the table.
    # +-----------+----------+--------+
    # |           | Singular | Plural |
    # +-----------+----------+--------+
    # | Foomative | foo      | foos   |
    # | Barative  | bar      | bars   |
    # | Bazative  | baz      | bazes  |
    # +-----------+----------+--------+
    column_hdrs: dict[int, str] = {}
    forms: list[Form] = []
    for row in chain(
        # This just combines these two (mostly mutually incomplementary)
        # calls into one list, with an expectation that we get a list of only
        # WikiNodes or HTML nodes. If they're mixed up, that's super weird. It's
        # a hack!
        tree.find_child_recursively(NodeKind.TABLE_ROW),
        tree.find_html_recursively("tr"),
    ):
        row_hdr = ""
        # Look for headers and put the data in row_hdr or into a slot in
        # column_hdrs.
        for i, cell in chain(
            row.find_child(
                NodeKind.TABLE_HEADER_CELL | NodeKind.TABLE_CELL,
                with_index=True,
            ),
            row.find_html(
                ["th", "td"], with_index=True, attr_name="", attr_value=""
            ),
        ):
            if (
                isinstance(cell, WikiNode)
                and cell.kind == NodeKind.TABLE_HEADER_CELL
            ) or (isinstance(cell, HTMLNode) and cell.tag == "th"):
                text = clean_node(
                    wxr, data, cell, node_handler_fn=cell_node_fn
                ).strip()
                if not text:
                    # In case there's an empty cell on the first row.
                    if i not in column_hdrs:
                        column_hdrs[i] = ""
                    continue
                lines = [s.strip() for s in text.splitlines()]
                if len(lines) != 1:
                    # SEW style: a single cell, first line is the 'header',
                    # second is the form/data.
                    logger.debug(
                        f"{wxr.wtp.title}: A header cell that's "
                        f"not exactly 1 line: {repr(text)}"
                    )
                if len(lines) == 1:
                    # XXX do tag parsing instead of i == 0; Levenshtein.
                    if text in simple_tag_map:
                        # Found something that looks like a tag.
                        row_hdr = text
                        column_hdrs[i] = text
                    continue
                else:
                    # Handle headers with more than one line.
                    ...
            else:
                tags = []
                if i in column_hdrs and column_hdrs[i]:
                    tags.append(column_hdrs[i])
                if row_hdr:
                    tags.append(row_hdr)
                forms.append(Form(form=text, raw_tags=tags))

    # logger.debug(
    #     f"{wxr.wtp.title}\n{print_tree(tree, indent=2, ret_value=True)}"
    # )
    # print(forms)

    # Replace raw_tags with tags if appropriate
    for form in forms:
        legit_tags, new_raw_tags, poses = convert_tags(form.raw_tags)
        # Poses are strings like "adj 1", used in pronunciation data
        # to later associate sound data with the correct pos entry.
        # Ignored here.
        if legit_tags:
            form.tags = legit_tags
            form.tags.extend(poses)
            form.raw_tags = new_raw_tags

    return forms
